In [3]:
import warnings
warnings.filterwarnings("ignore")
In [2]:
import pandas as pd
import numpy as np
import os, cv2

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

import tensorflow as tf
import keras
from keras.utils.np_utils import to_categorical
from tensorflow.keras.models import Sequential
from keras.layers import Dense, Conv2D, Conv3D, Flatten, MaxPool2D, Dropout, Activation, AvgPool2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from keras.callbacks import ReduceLROnPlateau
from keras.optimizers import RMSprop, Adam

import matplotlib.pyplot as plt # for visualization
import matplotlib.image as mpimg
import matplotlib.image as imgplt #for image visualization
import seaborn as sns #Seaborn for visualization
2024-01-08 06:30:57.848126: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-01-08 06:30:59.765302: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
In [4]:
df = pd.read_csv("datasets/tb-wellgen-smear/v1/tb-labels.csv")
In [6]:
#df['tb_positive'] = df['tb_positive'].astype(str)
df.head()
Out[6]:
image tb_positive file_path
0 tb00000001.jpg 0 /home/ngsci/datasets/tb-wellgen-smear/images/0...
1 tb00000002.jpg 0 /home/ngsci/datasets/tb-wellgen-smear/images/0...
2 tb00000003.jpg 0 /home/ngsci/datasets/tb-wellgen-smear/images/0...
3 tb00000004.jpg 0 /home/ngsci/datasets/tb-wellgen-smear/images/0...
4 tb00000005.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0...
In [7]:
df.dtypes
Out[7]:
image          object
tb_positive     int64
file_path      object
dtype: object
In [8]:
df['tb_positive'].value_counts()
Out[8]:
tb_positive
0    71111
1     3976
Name: count, dtype: int64
In [12]:
image_index = 0
image_path = df.loc[image_index, 'file_path']
image = mpimg.imread(image_path)
plt.imshow(image)
plt.title(f"Image: {df.loc[image_index, 'image']}, TB Positive: {df.loc[image_index, 'tb_positive']}")
plt.axis('off')  # Hide axis labels
plt.show()
In [8]:
image_index = 4
image_path = df.loc[image_index, 'file_path']
image = mpimg.imread(image_path)
plt.imshow(image)
plt.title(f"Image: {df.loc[image_index, 'image']}, TB Positive: {df.loc[image_index, 'tb_positive']}")
plt.axis('off')  # Hide axis labels
plt.show()

Sample the data (handling data imbalance)¶

In [3]:
# Select rows where tb_positive is 1
tb_positive_1_sample = df[df['tb_positive'] == 1].sample(n=3000, random_state=42)

# Selectr ows where tb_positive is 0
tb_positive_0_sample = df[df['tb_positive'] == 0].sample(n=3000, random_state=42)

# Concatenate the sampled DataFrames
subset_df = pd.concat([tb_positive_1_sample, tb_positive_0_sample])

# Shuffle the result DataFrame to mix positive and negative samples
subset_df = subset_df.sample(frac=1, random_state=42).reset_index(drop=True)
In [7]:
subset_df.shape
Out[7]:
(6000, 3)
In [34]:
subset_df['tb_positive'].value_counts()
Out[34]:
tb_positive
1    3000
0    3000
Name: count, dtype: int64
In [8]:
subset_df.head(20)
Out[8]:
image tb_positive file_path
0 tb00021398.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0...
1 tb00069006.jpg 0 /home/ngsci/datasets/tb-wellgen-smear/images/0...
2 tb00028615.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0...
3 tb00028976.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0...
4 tb00063551.jpg 0 /home/ngsci/datasets/tb-wellgen-smear/images/0...
5 tb00034794.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0...
6 tb00072507.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0...
7 tb00023703.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0...
8 tb00011452.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0...
9 tb00012749.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0...
10 tb00053581.jpg 0 /home/ngsci/datasets/tb-wellgen-smear/images/0...
11 tb00038680.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0...
12 tb00063095.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0...
13 tb00012030.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0...
14 tb00052634.jpg 0 /home/ngsci/datasets/tb-wellgen-smear/images/0...
15 tb00066090.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0...
16 tb00036636.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0...
17 tb00023179.jpg 0 /home/ngsci/datasets/tb-wellgen-smear/images/0...
18 tb00056604.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0...
19 tb00024216.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0...
In [36]:
image_index = 1
image_path = subset_df.loc[image_index, 'file_path']
image = mpimg.imread(image_path)
plt.imshow(image)
plt.title(f"Image: {subset_df.loc[image_index, 'image']}, TB Positive: {subset_df.loc[image_index, 'tb_positive']}")
plt.axis('off')  # Hide axis labels
plt.show()
In [39]:
image_index = 2
image_path = subset_df.loc[image_index, 'file_path']
image = mpimg.imread(image_path)
plt.imshow(image)
plt.title(f"Image: {subset_df.loc[image_index, 'image']}, TB Positive: {subset_df.loc[image_index, 'tb_positive']}")
plt.axis('off')  # Hide axis labels
plt.show()

Try out the initial model on a smaller subset¶

In [5]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
In [12]:
X = subset_df['file_path']
y = subset_df['tb_positive']

# Encode labels (0 and 1)
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)
In [5]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D  # Added import statements

Train, validation, test¶

In [6]:
from keras.layers import BatchNormalization
from keras.callbacks import EarlyStopping
from keras.callbacks import LearningRateScheduler
In [43]:
# Data augmentation for training set
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

# For validation and test sets, only rescale
validation_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# Specify the target size based on your model's input size
target_size = (224, 224)
batch_size = 32

# Create generators
train_generator = train_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'file_path': X_train, 'tb_positive': y_train}),
    x_col='file_path',
    y_col='tb_positive',
    class_mode='raw',  # 'raw' for binary classification
    target_size=target_size,
    batch_size=batch_size,
    shuffle=True
)

validation_generator = validation_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'file_path': X_val, 'tb_positive': y_val}),
    x_col='file_path',
    y_col='tb_positive',
    class_mode='raw',
    target_size=target_size,
    batch_size=batch_size,
    shuffle=False  # Set to False for validation and test sets
)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'file_path': X_test, 'tb_positive': y_test}),
    x_col='file_path',
    y_col='tb_positive',
    class_mode='raw',
    target_size=target_size,
    batch_size=batch_size,
    shuffle=False  # Set to False for validation and test sets
)
Found 4320 validated image filenames.
Found 480 validated image filenames.
Found 1200 validated image filenames.

CNN Model¶

In [44]:
model = Sequential()

model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))

model.add(Flatten())
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Define the early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model using fit_generator
history = model.fit_generator(
    train_generator,
    steps_per_epoch=len(X_train) // batch_size,
    epochs=30,  # Adjust the number of epochs as needed
    validation_data=validation_generator,
    validation_steps=len(X_val) // batch_size,
    callbacks=[early_stopping]
)
Epoch 1/30
2024-01-07 06:17:13.510106: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
135/135 [==============================] - ETA: 0s - loss: 0.7574 - accuracy: 0.6560
2024-01-07 06:20:47.666109: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
135/135 [==============================] - 232s 2s/step - loss: 0.7574 - accuracy: 0.6560 - val_loss: 0.7212 - val_accuracy: 0.5063
Epoch 2/30
135/135 [==============================] - 200s 1s/step - loss: 0.6629 - accuracy: 0.6840 - val_loss: 0.6883 - val_accuracy: 0.4917
Epoch 3/30
135/135 [==============================] - 202s 1s/step - loss: 0.6093 - accuracy: 0.7000 - val_loss: 0.9093 - val_accuracy: 0.5063
Epoch 4/30
135/135 [==============================] - 198s 1s/step - loss: 0.5871 - accuracy: 0.7199 - val_loss: 1.2943 - val_accuracy: 0.5063
Epoch 5/30
135/135 [==============================] - 201s 1s/step - loss: 0.5541 - accuracy: 0.7310 - val_loss: 1.6731 - val_accuracy: 0.5063
Epoch 6/30
135/135 [==============================] - 203s 2s/step - loss: 0.5200 - accuracy: 0.7579 - val_loss: 0.6916 - val_accuracy: 0.4750
Epoch 7/30
135/135 [==============================] - 200s 1s/step - loss: 0.5100 - accuracy: 0.7627 - val_loss: 0.5896 - val_accuracy: 0.6958
Epoch 8/30
135/135 [==============================] - 202s 1s/step - loss: 0.4778 - accuracy: 0.7836 - val_loss: 0.7417 - val_accuracy: 0.4958
Epoch 9/30
135/135 [==============================] - 200s 1s/step - loss: 0.4692 - accuracy: 0.7806 - val_loss: 0.7425 - val_accuracy: 0.5729
Epoch 10/30
135/135 [==============================] - 201s 1s/step - loss: 0.4529 - accuracy: 0.8002 - val_loss: 0.6964 - val_accuracy: 0.4792
Epoch 11/30
135/135 [==============================] - 199s 1s/step - loss: 0.4416 - accuracy: 0.8132 - val_loss: 0.6352 - val_accuracy: 0.6417
Epoch 12/30
135/135 [==============================] - 197s 1s/step - loss: 0.4181 - accuracy: 0.8181 - val_loss: 1.7228 - val_accuracy: 0.5000
In [45]:
test_loss, test_accuracy = model.evaluate_generator(test_generator, steps=len(X_test) // batch_size)
print(f"Test Accuracy: {test_accuracy}")
2024-01-07 06:57:49.077942: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
Test Accuracy: 0.6782094836235046
In [46]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

# Get predicted probabilities for the positive class
y_pred_prob = model.predict_generator(test_generator)

# Compute ROC curve and ROC area
fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
roc_auc = auc(fpr, tpr)

# Plot ROC curve
plt.figure(figsize=(8, 8))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()
2024-01-07 06:58:29.483262: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
In [47]:
# Plot training and validation accuracies
plt.figure(figsize=(12, 6))

# Plot training accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Plot training loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.show()
Model might be overfitting (validation loss higher than training loss)¶

The model may be too complex for the given data, capturing noise and specificities of the training set that do not generalize well. Overfitting is often associated with a low training loss but a high validation loss.

In [48]:
y_pred = model.predict_generator(test_generator).flatten() > 0.5
conf_matrix = confusion_matrix(y_test, y_pred)

# Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False)
plt.title("Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()
2024-01-07 06:59:05.049370: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
In [49]:
print(classification_report(y_test, y_pred))
              precision    recall  f1-score   support

           0       0.61      0.93      0.74       586
           1       0.86      0.43      0.58       614

    accuracy                           0.68      1200
   macro avg       0.74      0.68      0.66      1200
weighted avg       0.74      0.68      0.66      1200

Recall rate too low for Class 1 (TB Positive), so we need to do some image preprocessing¶

In [7]:
import cv2
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from skimage.morphology import remove_small_holes,remove_small_objects,binary_closing,closing,convex_hull_image
from skimage.morphology import skeletonize, thin
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder,MultiLabelBinarizer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier,plot_tree
from sklearn.metrics import confusion_matrix,roc_auc_score,f1_score,classification_report
from scipy import stats
#from imblearn.over_sampling import SMOTE 
import pandas as pd
import seaborn as sns
import os
from collections import Counter
In [8]:
# Function definition of image segmentation to separate the potential bacilli objects from image background

def image_segmentation(img):
    
    # Transform the input image to YCbCr color space and calculate the histogram and its first derivative of cr-component 
    img_ycrcb = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)
    img_cr = img_ycrcb[:,:,1]       
    cr_hist,_ = np.histogram(img_cr.ravel(),256,[0,256])
    cr_hist_diff = np.diff(cr_hist)

    # Transform the input image to CIE-Lab color space and calculate the histogram and its first derivative of a-component 
    img_lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    img_a = img_lab[:,:,1]
    a_hist,_ = np.histogram(img_a.ravel(),256,[0,256])
    a_hist_diff = np.diff(a_hist)
    
    int_lvls = np.arange(0,255)
    
    # Calculating the segmentation threshold for cr-component based on its difference histogram
    
    # -18000 is the threshold of cr-component for (1200x900) image. So scaling the threshold as per input image dimensions
    cr_hist_th = int(-18000*(orig_img_shape[0]*orig_img_shape[1])/(1200*900))
    if (int_lvls[cr_hist_diff<=cr_hist_th].size!=0):
        cr_th_i = np.max(int_lvls[cr_hist_diff<=cr_hist_th])
    # If there is no pixel intensity below -18000, take threshold as peak of cr histogram
    else:
        cr_th_i = np.argmax(cr_hist)

    # Calculating the segmentation threshold for a-component based on its difference histogram
    
    # -1000 is the threshold of a-component for (1200x900) image. So scaling the threshold as per input image dimensions
    a_hist_th = int(-1000*(orig_img_shape[0]*orig_img_shape[1])/(1200*900))
    if (int_lvls[a_hist_diff<=a_hist_th].size!=0):
        a_th_i = np.max(int_lvls[a_hist_diff<=a_hist_th])
    # If there is no pixel intensity below -1000, take threshold as peak of a-histogram
    else:
        a_th_i = np.argmax(a_hist)

    
    # Thresholding the cr and a components using the above thresholds and calculating their segmented image outputs
    a_th = a_th_i
    cr_th = cr_th_i
    _, img_a_th = cv2.threshold(img_a, a_th, 255, cv2.THRESH_BINARY)
    _, img_cr_th = cv2.threshold(img_cr, cr_th, 255, cv2.THRESH_BINARY)
    
    # Performing logical AND between both the segmented images to get the final segmented image
    img_seg = cv2.bitwise_and(img_cr_th,img_a_th)

    return img_seg
In [6]:
'''
def image_segmentation(img):
    # Transform the input image to YCbCr color space and calculate the histogram and its first derivative of cr-component 
    img_ycrcb = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)
    img_cr = img_ycrcb[:,:,1]       
    cr_hist, _ = np.histogram(img_cr.ravel(), 256, [0, 256])
    cr_hist_diff = np.diff(cr_hist)

    # Transform the input image to CIE-Lab color space and calculate the histogram and its first derivative of a-component 
    img_lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    img_a = img_lab[:,:,1]
    a_hist, _ = np.histogram(img_a.ravel(), 256, [0, 256])
    a_hist_diff = np.diff(a_hist)
    
    int_lvls = np.arange(0, 255)
    
    # Calculate the segmentation threshold for cr-component based on its difference histogram
    cr_hist_th = int(-18000 * (img.shape[0] * img.shape[1]) / (1200 * 900))
    cr_th_i = np.max(int_lvls[cr_hist_diff <= cr_hist_th]) if (int_lvls[cr_hist_diff <= cr_hist_th].size != 0) else np.argmax(cr_hist)

    # Calculate the segmentation threshold for a-component based on its difference histogram
    a_hist_th = int(-1000 * (img.shape[0] * img.shape[1]) / (1200 * 900))
    a_th_i = np.max(int_lvls[a_hist_diff <= a_hist_th]) if (int_lvls[a_hist_diff <= a_hist_th].size != 0) else np.argmax(a_hist)

    # Threshold the cr and a components using the above thresholds and calculate their segmented image outputs
    a_th = a_th_i
    cr_th = cr_th_i
    _, img_a_th = cv2.threshold(img_a, a_th, 255, cv2.THRESH_BINARY)
    _, img_cr_th = cv2.threshold(img_cr, cr_th, 255, cv2.THRESH_BINARY)
    
    # Perform logical AND between both the segmented images to get the final segmented image
    img_seg = cv2.bitwise_and(img_cr_th, img_a_th)

    return img_seg
'''
In [11]:
image_index = 2 #positive
image_path = subset_df.loc[image_index, 'file_path']
original_image = mpimg.imread(image_path)

# Get the shape of the loaded image
orig_img_shape = original_image.shape

# Perform image segmentation
segmented_image = image_segmentation(original_image)

# Display the original and segmented images side by side
plt.figure(figsize=(10, 5))

# Original Image
plt.subplot(1, 2, 1)
plt.imshow(original_image)
plt.title("Original Image")
plt.axis('off')

# Segmented Image
plt.subplot(1, 2, 2)
plt.imshow(segmented_image, cmap='gray')  # Assuming segmented_image is a binary image
plt.title("Segmented Image")
plt.axis('off')

plt.show()
In [12]:
image_index = 4 #negative
image_path = subset_df.loc[image_index, 'file_path']
original_image = mpimg.imread(image_path)

# Get the shape of the loaded image
orig_img_shape = original_image.shape

# Perform image segmentation
segmented_image = image_segmentation(original_image)

# Display the original and segmented images side by side
plt.figure(figsize=(10, 5))

# Original Image
plt.subplot(1, 2, 1)
plt.imshow(original_image)
plt.title("Original Image")
plt.axis('off')

# Segmented Image
plt.subplot(1, 2, 2)
plt.imshow(segmented_image, cmap='gray')  # Assuming segmented_image is a binary image
plt.title("Segmented Image")
plt.axis('off')

plt.show()
In [13]:
image_index = 14 #negative
image_path = subset_df.loc[image_index, 'file_path']
original_image = mpimg.imread(image_path)

# Get the shape of the loaded image
orig_img_shape = original_image.shape

# Perform image segmentation
segmented_image = image_segmentation(original_image)

# Display the original and segmented images side by side
plt.figure(figsize=(10, 5))

# Original Image
plt.subplot(1, 2, 1)
plt.imshow(original_image)
plt.title("Original Image")
plt.axis('off')

# Segmented Image
plt.subplot(1, 2, 2)
plt.imshow(segmented_image, cmap='gray')  # Assuming segmented_image is a binary image
plt.title("Segmented Image")
plt.axis('off')

plt.show()
In [9]:
import matplotlib.image as mpimg

def refined_image_segmentation(img, orig_img_shape):
    # Transform the input image to YCbCr color space and calculate the histogram and its first derivative of cr-component 
    img_ycrcb = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)
    img_cr = img_ycrcb[:,:,1]       
    cr_hist,_ = np.histogram(img_cr.ravel(),256,[0,256])
    cr_hist_diff = np.diff(cr_hist)

    # Transform the input image to CIE-Lab color space and calculate the histogram and its first derivative of a-component 
    img_lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    img_a = img_lab[:,:,1]
    a_hist,_ = np.histogram(img_a.ravel(),256,[0,256])
    a_hist_diff = np.diff(a_hist)
    
    int_lvls = np.arange(0,255)
    
    # Threshold scaling based on input image dimensions
    scale_factor = (orig_img_shape[0]*orig_img_shape[1])/(1200*900)
    cr_hist_th = int(-18000 * scale_factor)
    a_hist_th = int(-1000 * scale_factor)
    
    # Calculating the segmentation thresholds
    cr_th_i = np.max(int_lvls[cr_hist_diff<=cr_hist_th]) if (int_lvls[cr_hist_diff<=cr_hist_th].size!=0) else np.argmax(cr_hist)
    a_th_i = np.max(int_lvls[a_hist_diff<=a_hist_th]) if (int_lvls[a_hist_diff<=a_hist_th].size!=0) else np.argmax(a_hist)

    # Thresholding the cr and a components using the above thresholds
    _, img_a_th = cv2.threshold(img_a, a_th_i, 255, cv2.THRESH_BINARY)
    _, img_cr_th = cv2.threshold(img_cr, cr_th_i, 255, cv2.THRESH_BINARY)
    
    # Performing logical AND between both the segmented images
    img_seg = cv2.bitwise_and(img_cr_th,img_a_th)

    # Morphological operations to remove noise
    kernel = np.ones((3,3), np.uint8)
    img_seg = cv2.morphologyEx(img_seg, cv2.MORPH_OPEN, kernel, iterations=2) # Opening to remove noise
    img_seg = cv2.morphologyEx(img_seg, cv2.MORPH_CLOSE, kernel, iterations=2) # Closing to join fragmented regions

    # Contour detection to find boundaries of segmented regions
    contours, _ = cv2.findContours(img_seg, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Filter contours based on shape characteristics
    for cnt in contours:
        # Calculate contour area and aspect ratio
        area = cv2.contourArea(cnt)
        x, y, w, h = cv2.boundingRect(cnt)
        aspect_ratio = float(w)/h
        
        # Filter out non-rod-shaped or too small objects
        if area < 100 or aspect_ratio > 3 or aspect_ratio < 0.3:
            cv2.drawContours(img_seg, [cnt], 0, 0, -1)
    
    return img_seg
In [6]:
image_index = 14  # Index of the image you want to test
image_path = subset_df.loc[image_index, 'file_path']

# Load the original image from the file path
original_image = mpimg.imread(image_path)

# Ensure the image is in BGR format if it's loaded in RGB format
if original_image.shape[2] == 3:  # If the image is RGB
    original_image = original_image[:, :, ::-1]  # Convert RGB to BGR

# Call the refined segmentation function with the loaded image
orig_img_shape = original_image.shape[:2]
segmented_image = refined_image_segmentation(original_image, orig_img_shape)

# Display the original and segmented images side by side
plt.figure(figsize=(10, 5))

# Original Image
plt.subplot(1, 2, 1)
plt.imshow(original_image)
plt.title("Original Image")
plt.axis('off')

# Segmented Image
plt.subplot(1, 2, 2)
plt.imshow(segmented_image, cmap='gray')  # Assuming segmented_image is a binary image
plt.title("Segmented Image")
plt.axis('off')

plt.show()
In [7]:
image_index = 2  # Index of the image you want to test
image_path = subset_df.loc[image_index, 'file_path']

# Load the original image from the file path
original_image = mpimg.imread(image_path)

# Ensure the image is in BGR format if it's loaded in RGB format
if original_image.shape[2] == 3:  # If the image is RGB
    original_image = original_image[:, :, ::-1]  # Convert RGB to BGR

# Call the refined segmentation function with the loaded image
orig_img_shape = original_image.shape[:2]
segmented_image = refined_image_segmentation(original_image, orig_img_shape)

# Display the original and segmented images side by side
plt.figure(figsize=(10, 5))

# Original Image
plt.subplot(1, 2, 1)
plt.imshow(original_image)
plt.title("Original Image")
plt.axis('off')

# Segmented Image
plt.subplot(1, 2, 2)
plt.imshow(segmented_image, cmap='gray')  # Assuming segmented_image is a binary image
plt.title("Segmented Image")
plt.axis('off')

plt.show()
In [8]:
import os

# Directory to save processed images
processed_dir = 'processed_images'

# Create the directory if it doesn't exist
os.makedirs(processed_dir, exist_ok=True)

subset_df['processed_file_path'] = ''

# Loop through each row in subset_df
for index, row in subset_df.iterrows():
    # Load the original image
    image_path = row['file_path']
    original_image = mpimg.imread(image_path)
    
    if original_image.shape[2] == 3:  # If the image is RGB
        original_image = original_image[:, :, ::-1]  # Convert RGB to BGR

    # Call the refined segmentation function with the loaded image
    orig_img_shape = original_image.shape[:2]
    segmented_image = refined_image_segmentation(original_image, orig_img_shape)

    # Save the processed image with a new filename
    processed_image_path = os.path.join(processed_dir, f'processed_image_{index}.jpg')
    cv2.imwrite(processed_image_path, segmented_image)

    # Update the 'processed_file_path' column with the new path
    subset_df.at[index, 'processed_file_path'] = processed_image_path
In [9]:
subset_df.head()
Out[9]:
image tb_positive file_path processed_file_path
0 tb00021398.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0... processed_images/processed_image_0.jpg
1 tb00069006.jpg 0 /home/ngsci/datasets/tb-wellgen-smear/images/0... processed_images/processed_image_1.jpg
2 tb00028615.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0... processed_images/processed_image_2.jpg
3 tb00028976.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0... processed_images/processed_image_3.jpg
4 tb00063551.jpg 0 /home/ngsci/datasets/tb-wellgen-smear/images/0... processed_images/processed_image_4.jpg
In [16]:
X1 = subset_df['processed_file_path']
y1 = subset_df['tb_positive']

# Encode labels (0 and 1)
label_encoder = LabelEncoder()
y1 = label_encoder.fit_transform(y1)

# Split the data into training and testing sets
X_train1, X_test1, y_train1, y_test1 = train_test_split(X1, y1, test_size=0.2, random_state=42)
X_train1, X_val1, y_train1, y_val1 = train_test_split(X_train1, y_train1, test_size=0.1, random_state=42)

Use the segments as a mask --> need to look at color and shape Take the original image parts that fit the mask and use that for training Highlighting the features (bounding boxes) that it is using for the inference ResNet

In [18]:
# Data augmentation for training set
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

# For validation and test sets, only rescale
validation_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# Specify the target size based on your model's input size
target_size = (224, 224)
batch_size = 32

train_generator1 = train_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'processed_file_path': X_train1, 'tb_positive': y_train1}),
    x_col='processed_file_path',
    y_col='tb_positive',
    class_mode='raw',  # 'raw' for binary classification
    target_size=target_size,
    batch_size=batch_size,
    shuffle=True
)

validation_generator1 = validation_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'processed_file_path': X_val1, 'tb_positive': y_val1}),
    x_col='processed_file_path',
    y_col='tb_positive',
    class_mode='raw',
    target_size=target_size,
    batch_size=batch_size,
    shuffle=False  # Set to False for validation and test sets
)

test_generator1 = test_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'processed_file_path': X_test1, 'tb_positive': y_test1}),
    x_col='processed_file_path',
    y_col='tb_positive',
    class_mode='raw',
    target_size=target_size,
    batch_size=batch_size,
    shuffle=False  # Set to False for validation and test sets
)
Found 4320 validated image filenames.
Found 480 validated image filenames.
Found 1200 validated image filenames.
In [19]:
model_1 = Sequential()

model_1.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model_1.add(BatchNormalization())
model_1.add(MaxPooling2D((2, 2)))

model_1.add(Conv2D(64, (3, 3), activation='relu'))
model_1.add(BatchNormalization())
model_1.add(MaxPooling2D((2, 2)))

model_1.add(Conv2D(64, (3, 3), activation='relu'))
model_1.add(BatchNormalization())
model_1.add(MaxPooling2D((2, 2)))

model_1.add(Conv2D(128, (3, 3), activation='relu'))
model_1.add(BatchNormalization())
model_1.add(MaxPooling2D((2, 2)))

model_1.add(Conv2D(128, (3, 3), activation='relu'))
model_1.add(BatchNormalization())
model_1.add(MaxPooling2D((2, 2)))

model_1.add(Conv2D(256, (3, 3), activation='relu'))
model_1.add(BatchNormalization())
model_1.add(MaxPooling2D((2, 2)))

model_1.add(Flatten())
model_1.add(BatchNormalization())
model_1.add(Dropout(0.5))
model_1.add(Dense(1, activation='sigmoid'))

# Compile the model
model_1.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Define the early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model using fit_generator
history1 = model_1.fit_generator(
    train_generator1,
    steps_per_epoch=len(X_train1) // batch_size,
    epochs=30,  # Adjust the number of epochs as needed
    validation_data=validation_generator1,
    validation_steps=len(X_val1) // batch_size,
    callbacks=[early_stopping]
)
/tmp/ipykernel_230/2723705095.py:39: UserWarning: `Model.fit_generator` is deprecated and will be removed in a future version. Please use `Model.fit`, which supports generators.
  history1 = model_1.fit_generator(
Epoch 1/30
2024-01-07 18:38:09.277221: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
135/135 [==============================] - ETA: 0s - loss: 0.8333 - accuracy: 0.5720
2024-01-07 18:40:35.170034: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
135/135 [==============================] - 155s 1s/step - loss: 0.8333 - accuracy: 0.5720 - val_loss: 0.6936 - val_accuracy: 0.4938
Epoch 2/30
135/135 [==============================] - 151s 1s/step - loss: 0.7252 - accuracy: 0.5933 - val_loss: 0.6648 - val_accuracy: 0.6354
Epoch 3/30
135/135 [==============================] - 148s 1s/step - loss: 0.6811 - accuracy: 0.6139 - val_loss: 0.6258 - val_accuracy: 0.6375
Epoch 4/30
135/135 [==============================] - 147s 1s/step - loss: 0.6650 - accuracy: 0.6199 - val_loss: 0.6099 - val_accuracy: 0.6583
Epoch 5/30
135/135 [==============================] - 148s 1s/step - loss: 0.6652 - accuracy: 0.6076 - val_loss: 0.6275 - val_accuracy: 0.6396
Epoch 6/30
135/135 [==============================] - 146s 1s/step - loss: 0.6432 - accuracy: 0.6366 - val_loss: 0.6381 - val_accuracy: 0.6187
Epoch 7/30
135/135 [==============================] - 149s 1s/step - loss: 0.6288 - accuracy: 0.6361 - val_loss: 0.6212 - val_accuracy: 0.6667
Epoch 8/30
135/135 [==============================] - 158s 1s/step - loss: 0.6309 - accuracy: 0.6319 - val_loss: 0.6470 - val_accuracy: 0.5833
Epoch 9/30
135/135 [==============================] - 146s 1s/step - loss: 0.6183 - accuracy: 0.6468 - val_loss: 0.6123 - val_accuracy: 0.6771
In [20]:
test_loss, test_accuracy = model_1.evaluate_generator(test_generator1, steps=len(X_test1) // batch_size)
print(f"Test Accuracy: {test_accuracy}")
/tmp/ipykernel_230/3453590097.py:1: UserWarning: `Model.evaluate_generator` is deprecated and will be removed in a future version. Please use `Model.evaluate`, which supports generators.
  test_loss, test_accuracy = model_1.evaluate_generator(test_generator1, steps=len(X_test1) // batch_size)
2024-01-07 19:00:37.630192: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
Test Accuracy: 0.6570945978164673
In [22]:
from sklearn.metrics import roc_curve, auc

# Get predicted probabilities for the positive class
y_pred_prob1 = model_1.predict_generator(test_generator1)

# Compute ROC curve and ROC area
fpr, tpr, _ = roc_curve(y_test1, y_pred_prob1)
roc_auc = auc(fpr, tpr)

# Plot ROC curve
plt.figure(figsize=(8, 8))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()
/tmp/ipykernel_230/477039277.py:4: UserWarning: `Model.predict_generator` is deprecated and will be removed in a future version. Please use `Model.predict`, which supports generators.
  y_pred_prob1 = model_1.predict_generator(test_generator1)
2024-01-07 19:25:30.849387: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
In [23]:
# Plot training and validation accuracies
plt.figure(figsize=(12, 6))

# Plot training accuracy
plt.subplot(1, 2, 1)
plt.plot(history1.history['accuracy'], label='Training Accuracy')
plt.plot(history1.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Plot training loss
plt.subplot(1, 2, 2)
plt.plot(history1.history['loss'], label='Training Loss')
plt.plot(history1.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.show()
In [24]:
y_pred1 = model_1.predict_generator(test_generator1).flatten() > 0.5
print(classification_report(y_test1, y_pred1))
/tmp/ipykernel_230/4157061623.py:1: UserWarning: `Model.predict_generator` is deprecated and will be removed in a future version. Please use `Model.predict`, which supports generators.
  y_pred1 = model_1.predict_generator(test_generator1).flatten() > 0.5
2024-01-07 19:25:55.929097: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
              precision    recall  f1-score   support

           0       0.61      0.80      0.69       586
           1       0.73      0.52      0.61       614

    accuracy                           0.66      1200
   macro avg       0.67      0.66      0.65      1200
weighted avg       0.67      0.66      0.65      1200

Compare with classification report - from pre imageprocessing¶

          precision    recall  f1-score   support

       0       0.61      0.93      0.74       586
       1       0.86      0.43      0.58       614

accuracy                           0.68      1200

Comments:¶

Recall rate for Class 1 improved (0.43 to 0.52), but precision is now lower. Overall performance is better with the preprocessing steps, but not good enough.

In [25]:
model_2 = Sequential()

# 2 Convolution layers with 32 (3x3) filters
model_2.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model_2.add(BatchNormalization())
model_2.add(MaxPooling2D((2, 2)))

model_2.add(Conv2D(32, (3, 3), activation='relu'))
model_2.add(BatchNormalization())
model_2.add(MaxPooling2D((2, 2)))

# 3 Convolution layers with 64 (3x3) filters
model_2.add(Conv2D(64, (3, 3), activation='relu'))
model_2.add(BatchNormalization())
model_2.add(MaxPooling2D((2, 2)))

model_2.add(Conv2D(64, (3, 3), activation='relu'))
model_2.add(BatchNormalization())
model_2.add(MaxPooling2D((2, 2)))

model_2.add(Conv2D(64, (3, 3), activation='relu'))
model_2.add(BatchNormalization())
model_2.add(MaxPooling2D((2, 2)))

# 1 Convolution layer with 128 (3x3) filters
model_2.add(Conv2D(128, (3, 3), activation='relu'))
model_2.add(BatchNormalization())
model_2.add(MaxPooling2D((2, 2)))

# Fully connected layer with 128 neurons
model_2.add(Flatten())
model_2.add(Dense(128, activation='relu'))
model_2.add(BatchNormalization())
model_2.add(Dropout(0.5))

# Sigmoid neuron for binary classification
model_2.add(Dense(1, activation='sigmoid'))

# Compile the model
model_2.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Define the early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Assume train_generator2 and validation_generator2 are already defined and loaded with the preprocessed images
# Train the model using fit
history2 = model_2.fit(
    train_generator1,
    steps_per_epoch=len(train_generator1),
    epochs=30,  # Adjust the number of epochs as needed
    validation_data=validation_generator1,
    validation_steps=len(validation_generator1),
    callbacks=[early_stopping]
)
Epoch 1/30
2024-01-07 19:50:07.642605: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
135/135 [==============================] - ETA: 0s - loss: 0.8251 - accuracy: 0.5674
2024-01-07 19:52:36.743098: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
135/135 [==============================] - 158s 1s/step - loss: 0.8251 - accuracy: 0.5674 - val_loss: 0.7018 - val_accuracy: 0.4958
Epoch 2/30
135/135 [==============================] - 138s 1s/step - loss: 0.7382 - accuracy: 0.5921 - val_loss: 0.6843 - val_accuracy: 0.5604
Epoch 3/30
135/135 [==============================] - 134s 992ms/step - loss: 0.6949 - accuracy: 0.6095 - val_loss: 0.6371 - val_accuracy: 0.6146
Epoch 4/30
135/135 [==============================] - 139s 1s/step - loss: 0.6681 - accuracy: 0.6088 - val_loss: 0.6139 - val_accuracy: 0.6458
Epoch 5/30
135/135 [==============================] - 138s 1s/step - loss: 0.6554 - accuracy: 0.6134 - val_loss: 0.6069 - val_accuracy: 0.6562
Epoch 6/30
135/135 [==============================] - 136s 1s/step - loss: 0.6380 - accuracy: 0.6266 - val_loss: 0.6135 - val_accuracy: 0.6479
Epoch 7/30
135/135 [==============================] - 140s 1s/step - loss: 0.6323 - accuracy: 0.6303 - val_loss: 0.6354 - val_accuracy: 0.6500
Epoch 8/30
135/135 [==============================] - 136s 1s/step - loss: 0.6239 - accuracy: 0.6403 - val_loss: 0.6244 - val_accuracy: 0.6458
Epoch 9/30
135/135 [==============================] - 140s 1s/step - loss: 0.6283 - accuracy: 0.6310 - val_loss: 0.6179 - val_accuracy: 0.6625
Epoch 10/30
135/135 [==============================] - 135s 1s/step - loss: 0.6165 - accuracy: 0.6514 - val_loss: 0.6176 - val_accuracy: 0.6500
In [26]:
test_loss, test_accuracy = model_2.evaluate_generator(test_generator1, steps=len(X_test1) // batch_size)
print(f"Test Accuracy: {test_accuracy}")
/tmp/ipykernel_230/2817440312.py:1: UserWarning: `Model.evaluate_generator` is deprecated and will be removed in a future version. Please use `Model.evaluate`, which supports generators.
  test_loss, test_accuracy = model_2.evaluate_generator(test_generator1, steps=len(X_test1) // batch_size)
2024-01-07 20:26:04.553264: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
Test Accuracy: 0.6613175868988037
In [27]:
y_pred2 = model_2.predict_generator(test_generator1).flatten() > 0.5
print(classification_report(y_test1, y_pred2))
/tmp/ipykernel_230/3609707038.py:1: UserWarning: `Model.predict_generator` is deprecated and will be removed in a future version. Please use `Model.predict`, which supports generators.
  y_pred2 = model_2.predict_generator(test_generator1).flatten() > 0.5
2024-01-07 20:27:00.402893: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
              precision    recall  f1-score   support

           0       0.62      0.81      0.70       586
           1       0.74      0.52      0.61       614

    accuracy                           0.66      1200
   macro avg       0.68      0.66      0.66      1200
weighted avg       0.68      0.66      0.66      1200

Extract patches¶

In [10]:
# Select rows where tb_positive is 1
tb_positive_1_sample1 = df[df['tb_positive'] == 1].sample(n=300, random_state=42)

# Selectr ows where tb_positive is 0
tb_positive_0_sample1 = df[df['tb_positive'] == 0].sample(n=300, random_state=42)

# Concatenate the sampled DataFrames
subset_df1 = pd.concat([tb_positive_1_sample1, tb_positive_0_sample1])

# Shuffle the result DataFrame to mix positive and negative samples
subset_df1 = subset_df1.sample(frac=1, random_state=42).reset_index(drop=True)
In [11]:
subset_df1.shape
Out[11]:
(600, 3)
In [8]:
subset_df1.head(10)
Out[8]:
image tb_positive file_path
0 tb00034641.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0...
1 tb00057150.jpg 0 /home/ngsci/datasets/tb-wellgen-smear/images/0...
2 tb00039106.jpg 0 /home/ngsci/datasets/tb-wellgen-smear/images/0...
3 tb00053050.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0...
4 tb00030266.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0...
5 tb00000398.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0...
6 tb00026207.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0...
7 tb00030844.jpg 0 /home/ngsci/datasets/tb-wellgen-smear/images/0...
8 tb00043678.jpg 1 /home/ngsci/datasets/tb-wellgen-smear/images/0...
9 tb00006079.jpg 0 /home/ngsci/datasets/tb-wellgen-smear/images/0...
In [11]:
def extract_patches_from_segmented(original_img, segmented_img, num_patches=3, patch_size=(224, 224)):
    contours, _ = cv2.findContours(segmented_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:num_patches]
    patches = []
    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)
        patch = original_img[y:y+h, x:x+w]
        patch = cv2.resize(patch, patch_size)
        patches.append(patch)
    return patches
In [12]:
def show_image_with_patches(original_img, patches):
    plt.figure(figsize=(15, 5))

    plt.subplot(1, 4, 1)
    plt.imshow(cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB))  # Convert BGR to RGB
    plt.title('Original Image')
    plt.axis('off')

    for i, patch in enumerate(patches):
        plt.subplot(1, 4, i+2)
        plt.imshow(cv2.cvtColor(patch, cv2.COLOR_BGR2RGB))  # Convert BGR to RGB
        plt.title(f'Patch {i+1}')
        plt.axis('off')

    plt.tight_layout()
    plt.show()
In [37]:
image_index = 2  # Index of the image you want to test
image_path = subset_df1.loc[image_index, 'file_path']

# Load the original image from the file path
original_img = mpimg.imread(image_path)
# Apply the refined image segmentation
segmented_img = refined_image_segmentation(original_img, original_img.shape[:2])

# Ensure the segmented image is binary (0 or 255)
segmented_img = cv2.threshold(segmented_img, 127, 255, cv2.THRESH_BINARY)[1]

# Extract patches from the original image
patches = extract_patches_from_segmented(original_img, segmented_img, num_patches=3, patch_size=(224, 224))

# Display the original image and the patches
show_image_with_patches(original_img, patches)
In [42]:
image_index = 5  # Index of the image you want to test
image_path = subset_df1.loc[image_index, 'file_path']

# Load the original image from the file path
original_img = mpimg.imread(image_path)
# Apply the refined image segmentation
segmented_img = refined_image_segmentation(original_img, original_img.shape[:2])

# Ensure the segmented image is binary (0 or 255)
segmented_img = cv2.threshold(segmented_img, 127, 255, cv2.THRESH_BINARY)[1]

# Extract patches from the original image
patches = extract_patches_from_segmented(original_img, segmented_img, num_patches=3, patch_size=(224, 224))

# Display the original image and the patches
show_image_with_patches(original_img, patches)
In [18]:
def extract_and_save_patches_with_labels(subset_df, new_directory, patch_size=(224, 224), num_patches=3):
    # Initialize a list to collect dictionaries of patch data
    patch_data_list = []

    if not os.path.exists(new_directory):
        os.makedirs(new_directory)

    for idx, row in subset_df.iterrows():
        original_img = cv2.imread(row['file_path'])
        tb_label = row['tb_positive']
        
        segmented_img = refined_image_segmentation(original_img, original_img.shape[:2])
        segmented_img = cv2.threshold(segmented_img, 127, 255, cv2.THRESH_BINARY)[1]
        
        patches = extract_patches_from_segmented(original_img, segmented_img, num_patches=num_patches, patch_size=patch_size)
        
        for i, patch in enumerate(patches):
            patch_name = f"patch_{row['image'].split('.')[0]}_{i}.png"
            patch_path = os.path.join(new_directory, patch_name)
            cv2.imwrite(patch_path, patch)
            
            # Append a dictionary of patch data to the list
            patch_data_list.append({'patch_image': patch_name, 'tb_positive': tb_label})
    
    # Convert the list of dictionaries to a DataFrame
    new_df = pd.DataFrame(patch_data_list)

    new_df_csv_path = os.path.join(new_directory, 'patches_and_labels.csv')
    new_df.to_csv(new_df_csv_path, index=False)

    return new_df_csv_path  # Return the path to the new CSV file for reference

# Make sure to define subset_df1, refined_image_segmentation, and extract_patches_from_segmented before this point
# Also, ensure that subset_df1 has the correct columns and data
new_directory = 'patched_images' 

# Call the function to extract and save patches with labels
csv_path = extract_and_save_patches_with_labels(subset_df1, new_directory)
print(f"Patches and labels saved to {csv_path}")
Patches and labels saved to patched_images/patches_and_labels.csv
In [19]:
patch_df = pd.read_csv('patched_images/patches_and_labels.csv')
patch_df['patch_image_path'] = patch_df['patch_image'].apply(lambda x: os.path.join(new_directory, x))
In [22]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator

X2 = patch_df['patch_image_path']
y2 = patch_df['tb_positive']
label_encoder = LabelEncoder()
y2 = label_encoder.fit_transform(y2)
In [23]:
# Split the data into training and testing sets
X_train2, X_test2, y_train2, y_test2 = train_test_split(X2, y2, test_size=0.2, random_state=42)
X_train2, X_val2, y_train2, y_val2 = train_test_split(X_train2, y_train2, test_size=0.1, random_state=42)
In [24]:
target_size = (224, 224)
batch_size = 32

train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

validation_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator2 = train_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'patch_image_path': X_train2, 'tb_positive': y_train2}),
    x_col='patch_image_path',
    y_col='tb_positive',
    class_mode='raw',  # 'raw' for binary classification
    target_size=target_size,
    batch_size=batch_size,
    shuffle=True
)

validation_generator2 = validation_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'patch_image_path': X_val2, 'tb_positive': y_val2}),
    x_col='patch_image_path',
    y_col='tb_positive',
    class_mode='raw',
    target_size=target_size,
    batch_size=batch_size,
    shuffle=False  # Set to False for validation and test sets
)

test_generator2 = test_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'patch_image_path': X_test2, 'tb_positive': y_test2}),
    x_col='patch_image_path',
    y_col='tb_positive',
    class_mode='raw',
    target_size=target_size,
    batch_size=batch_size,
    shuffle=False  # Set to False for validation and test sets
)
Found 1137 validated image filenames.
Found 127 validated image filenames.
Found 316 validated image filenames.
In [25]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

# Define model_3 with the same architecture as model_2
model_3 = Sequential()

# 2 Convolution layers with 32 (3x3) filters
model_3.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model_3.add(BatchNormalization())
model_3.add(MaxPooling2D((2, 2)))

model_3.add(Conv2D(32, (3, 3), activation='relu'))
model_3.add(BatchNormalization())
model_3.add(MaxPooling2D((2, 2)))

# 3 Convolution layers with 64 (3x3) filters
model_3.add(Conv2D(64, (3, 3), activation='relu'))
model_3.add(BatchNormalization())
model_3.add(MaxPooling2D((2, 2)))

model_3.add(Conv2D(64, (3, 3), activation='relu'))
model_3.add(BatchNormalization())
model_3.add(MaxPooling2D((2, 2)))

model_3.add(Conv2D(64, (3, 3), activation='relu'))
model_3.add(BatchNormalization())
model_3.add(MaxPooling2D((2, 2)))

# 1 Convolution layer with 128 (3x3) filters
model_3.add(Conv2D(128, (3, 3), activation='relu'))
model_3.add(BatchNormalization())
model_3.add(MaxPooling2D((2, 2)))

# Fully connected layer with 128 neurons
model_3.add(Flatten())
model_3.add(Dense(128, activation='relu'))
model_3.add(BatchNormalization())
model_3.add(Dropout(0.5))

# Sigmoid neuron for binary classification
model_3.add(Dense(1, activation='sigmoid'))

# Compile the model
model_3.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Define the early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model using fit with the patch data generators
history3 = model_3.fit(
    train_generator2,
    steps_per_epoch=len(train_generator2),
    epochs=30,  # Adjust the number of epochs as needed
    validation_data=validation_generator2,
    validation_steps=len(validation_generator2),
    callbacks=[early_stopping]
)
Epoch 1/30
2024-01-08 01:33:49.539764: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
36/36 [==============================] - ETA: 0s - loss: 0.7483 - accuracy: 0.6535
2024-01-08 01:34:07.414096: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
36/36 [==============================] - 19s 467ms/step - loss: 0.7483 - accuracy: 0.6535 - val_loss: 0.7005 - val_accuracy: 0.4488
Epoch 2/30
36/36 [==============================] - 16s 436ms/step - loss: 0.6989 - accuracy: 0.6544 - val_loss: 0.7721 - val_accuracy: 0.4567
Epoch 3/30
36/36 [==============================] - 16s 445ms/step - loss: 0.6796 - accuracy: 0.6667 - val_loss: 0.8172 - val_accuracy: 0.4646
Epoch 4/30
36/36 [==============================] - 16s 435ms/step - loss: 0.6552 - accuracy: 0.6895 - val_loss: 0.7012 - val_accuracy: 0.5118
Epoch 5/30
36/36 [==============================] - 16s 435ms/step - loss: 0.6240 - accuracy: 0.7071 - val_loss: 1.0093 - val_accuracy: 0.4567
Epoch 6/30
36/36 [==============================] - 16s 442ms/step - loss: 0.5883 - accuracy: 0.6992 - val_loss: 0.7398 - val_accuracy: 0.5512
In [26]:
test_loss, test_accuracy = model_3.evaluate_generator(test_generator2, steps=len(X_test2) // batch_size)
print(f"Test Accuracy: {test_accuracy}")
2024-01-08 01:36:47.144290: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
Test Accuracy: 0.5659722089767456
In [27]:
y_pred3 = model_3.predict_generator(test_generator2).flatten() > 0.5
print(classification_report(y_test2, y_pred3))
2024-01-08 01:39:35.046215: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       142
           1       0.55      1.00      0.71       174

    accuracy                           0.55       316
   macro avg       0.28      0.50      0.36       316
weighted avg       0.30      0.55      0.39       316

Using the extract_patches_from_segmented function only for positive labeled images and performing random cropping for negative labeled images may be a valid strategy, particularly since the negative images do not contain super specific features that segmentation would highlight. This approach could provide a more balanced view of the negative class.

Improved function for patches¶

In [1]:
import random

def random_crop(img, patch_size=(224, 224), num_patches=3):
    patches = []
    for _ in range(num_patches):
        max_x = img.shape[1] - patch_size[1]
        max_y = img.shape[0] - patch_size[0]
        x = random.randint(0, max_x)
        y = random.randint(0, max_y)
        patch = img[y:y + patch_size[0], x:x + patch_size[1]]
        patches.append(patch)
    return patches
In [13]:
from skimage import measure
from random import sample

def extract_patches(original_img, segmented_img, patch_size=(224, 224), num_patches=3):
    # Apply the refined segmentation function to the segmented image
    refined_segmentation = segmented_img

    # Find contours from the refined segmentation
    contours, _ = cv2.findContours(refined_segmentation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Initialize a list to hold the coordinates of potential TB bacilli areas
    potential_tb_areas = []

    # Loop through each contour to filter out non-rod shapes or small areas
    for cnt in contours:
        # Calculate contour area and aspect ratio
        area = cv2.contourArea(cnt)
        x, y, w, h = cv2.boundingRect(cnt)
        aspect_ratio = float(w)/h

        # Conditions to identify potential TB bacilli (rod-shaped and pink-stained)
        if area >= 100 and 0.3 <= aspect_ratio <= 3:
            potential_tb_areas.append((x, y, w, h))

    # Randomly select contours if there are more than the required number of patches
    if len(potential_tb_areas) > num_patches:
        selected_areas = sample(potential_tb_areas, num_patches)
    else:
        selected_areas = potential_tb_areas

    # Create the patches list
    patches = []

    # Extract patches based on the selected areas
    for (x, y, w, h) in selected_areas:
        # Calculate the center of the area
        center_x = x + w//2
        center_y = y + h//2

        # Define the top-left corner of the patch
        start_x = max(center_x - patch_size[0]//2, 0)
        start_y = max(center_y - patch_size[1]//2, 0)

        # Ensure the patch doesn't go outside the image boundaries
        if start_x + patch_size[0] > original_img.shape[1]:
            start_x = original_img.shape[1] - patch_size[0]
        if start_y + patch_size[1] > original_img.shape[0]:
            start_y = original_img.shape[0] - patch_size[1]

        # Extract the patch and append to the list
        patch = original_img[start_y:start_y + patch_size[1], start_x:start_x + patch_size[0]]
        patches.append(patch)

    return patches
'''
image_index = 2  # Index of the image you want to test
image_path = subset_df1.loc[image_index, 'file_path']

# Load the original image from the file path
original_img = mpimg.imread(image_path)
# Apply the refined image segmentation
segmented_img = refined_image_segmentation(original_img, original_img.shape[:2])

# Ensure the segmented image is binary (0 or 255)
#segmented_img = cv2.threshold(segmented_img, 127, 255, cv2.THRESH_BINARY)[1]

# Call the function to get patches
patches = extract_patches(original_img, segmented_img)
'''
Out[13]:
"\nimage_index = 2  # Index of the image you want to test\nimage_path = subset_df1.loc[image_index, 'file_path']\n\n# Load the original image from the file path\noriginal_img = mpimg.imread(image_path)\n# Apply the refined image segmentation\nsegmented_img = refined_image_segmentation(original_img, original_img.shape[:2])\n\n# Ensure the segmented image is binary (0 or 255)\n#segmented_img = cv2.threshold(segmented_img, 127, 255, cv2.THRESH_BINARY)[1]\n\n# Call the function to get patches\npatches = extract_patches(original_img, segmented_img)\n"
In [7]:
for i, patch in enumerate(patches):
    patch_filename = f'patch_{i+1}.png'
    cv2.imwrite(patch_filename, patch)

from IPython.display import Image, display

for i in range(len(patches)):
    patch_filename = f'patch_{i+1}.png'
    display(Image(patch_filename))
In [9]:
image_index = 4  # Index of the image you want to test
image_path = subset_df1.loc[image_index, 'file_path']

# Load the original image from the file path
original_img = mpimg.imread(image_path)
# Apply the refined image segmentation
segmented_img = refined_image_segmentation(original_img, original_img.shape[:2])

# Ensure the segmented image is binary (0 or 255)
#segmented_img = cv2.threshold(segmented_img, 127, 255, cv2.THRESH_BINARY)[1]

# Call the function to get patches
patches = extract_patches(original_img, segmented_img)

for i, patch in enumerate(patches):
    patch_filename = f'patch_{i+1}.png'
    cv2.imwrite(patch_filename, patch)

for i in range(len(patches)):
    patch_filename = f'patch_{i+1}.png'
    display(Image(patch_filename))
In [14]:
def extract_and_save_patches_with_labels(subset_df, new_directory, patch_size=(224, 224), num_patches=3):
    patch_data_list = []

    if not os.path.exists(new_directory):
        os.makedirs(new_directory)

    for idx, row in subset_df.iterrows():
        original_img = cv2.imread(row['file_path'])
        tb_label = row['tb_positive']
        
        if tb_label == 1:  # For positive labeled images
            segmented_img = refined_image_segmentation(original_img, original_img.shape[:2])
            #segmented_img = cv2.threshold(segmented_img, 127, 255, cv2.THRESH_BINARY)[1]
            patches = extract_patches(original_img, segmented_img, patch_size=patch_size, num_patches=num_patches)
        else:  # For negative labeled images
            patches = random_crop(original_img, patch_size=patch_size, num_patches=num_patches)
        
        for i, patch in enumerate(patches):
            patch_name = f"patch_{row['image'].split('.')[0]}_{i}.png"
            patch_path = os.path.join(new_directory, patch_name)
            cv2.imwrite(patch_path, patch)
            patch_data_list.append({'patch_image': patch_name, 'tb_positive': tb_label})
    
    new_df = pd.DataFrame(patch_data_list)
    new_df_csv_path = os.path.join(new_directory, 'patches_and_labels.csv')
    new_df.to_csv(new_df_csv_path, index=False)

    return new_df_csv_path

# New directory name
new_directory = 'patched_images1'  

# Call the function to extract and save patches with labels
csv_path = extract_and_save_patches_with_labels(subset_df1, new_directory)
print(f"Patches and labels saved to {csv_path}")
Patches and labels saved to patched_images1/patches_and_labels.csv
In [15]:
patch_df1 = pd.read_csv('patched_images1/patches_and_labels.csv')
patch_df1['patch_image_path'] = patch_df1['patch_image'].apply(lambda x: os.path.join(new_directory, x))
In [16]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator

X3 = patch_df1['patch_image_path']
y3 = patch_df1['tb_positive']
label_encoder = LabelEncoder()
y3 = label_encoder.fit_transform(y3)
In [17]:
# Split the data into training and testing sets
X_train3, X_test3, y_train3, y_test3 = train_test_split(X3, y3, test_size=0.2, random_state=42)
X_train3, X_val3, y_train3, y_val3 = train_test_split(X_train3, y_train3, test_size=0.1, random_state=42)
In [18]:
target_size = (224, 224)
batch_size = 32

train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

validation_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator3 = train_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'patch_image_path': X_train3, 'tb_positive': y_train3}),
    x_col='patch_image_path',
    y_col='tb_positive',
    class_mode='raw',  # 'raw' for binary classification
    target_size=target_size,
    batch_size=batch_size,
    shuffle=True
)

validation_generator3 = validation_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'patch_image_path': X_val3, 'tb_positive': y_val3}),
    x_col='patch_image_path',
    y_col='tb_positive',
    class_mode='raw',
    target_size=target_size,
    batch_size=batch_size,
    shuffle=False  # Set to False for validation and test sets
)

test_generator3 = test_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'patch_image_path': X_test3, 'tb_positive': y_test3}),
    x_col='patch_image_path',
    y_col='tb_positive',
    class_mode='raw',
    target_size=target_size,
    batch_size=batch_size,
    shuffle=False  # Set to False for validation and test sets
)
Found 1239 validated image filenames.
Found 138 validated image filenames.
Found 345 validated image filenames.
In [19]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

model_4 = Sequential()

# 2 Convolution layers with 32 (3x3) filters
model_4.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model_4.add(BatchNormalization())
model_4.add(MaxPooling2D((2, 2)))

model_4.add(Conv2D(32, (3, 3), activation='relu'))
model_4.add(BatchNormalization())
model_4.add(MaxPooling2D((2, 2)))

# 3 Convolution layers with 64 (3x3) filters
model_4.add(Conv2D(64, (3, 3), activation='relu'))
model_4.add(BatchNormalization())
model_4.add(MaxPooling2D((2, 2)))

model_4.add(Conv2D(64, (3, 3), activation='relu'))
model_4.add(BatchNormalization())
model_4.add(MaxPooling2D((2, 2)))

model_4.add(Conv2D(64, (3, 3), activation='relu'))
model_4.add(BatchNormalization())
model_4.add(MaxPooling2D((2, 2)))

# 1 Convolution layer with 128 (3x3) filters
model_4.add(Conv2D(128, (3, 3), activation='relu'))
model_4.add(BatchNormalization())
model_4.add(MaxPooling2D((2, 2)))

# Fully connected layer with 128 neurons
model_4.add(Flatten())
model_4.add(Dense(128, activation='relu'))
model_4.add(BatchNormalization())
model_4.add(Dropout(0.5))

# Sigmoid neuron for binary classification
model_4.add(Dense(1, activation='sigmoid'))

# Compile the model
model_4.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Define the early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model using fit with the patch data generators
history4 = model_4.fit(
    train_generator3,
    steps_per_epoch=len(train_generator3),
    epochs=30,  # Adjust the number of epochs as needed
    validation_data=validation_generator3,
    validation_steps=len(validation_generator3),
    callbacks=[early_stopping]
)
Epoch 1/30
2024-01-08 06:43:53.367448: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
39/39 [==============================] - ETA: 0s - loss: 0.4465 - accuracy: 0.8184
2024-01-08 06:44:12.684984: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
39/39 [==============================] - 20s 476ms/step - loss: 0.4465 - accuracy: 0.8184 - val_loss: 0.7362 - val_accuracy: 0.5725
Epoch 2/30
39/39 [==============================] - 18s 445ms/step - loss: 0.2672 - accuracy: 0.9015 - val_loss: 0.7732 - val_accuracy: 0.5725
Epoch 3/30
39/39 [==============================] - 18s 442ms/step - loss: 0.2193 - accuracy: 0.9314 - val_loss: 1.2834 - val_accuracy: 0.5725
Epoch 4/30
39/39 [==============================] - 18s 443ms/step - loss: 0.2090 - accuracy: 0.9322 - val_loss: 1.2117 - val_accuracy: 0.5725
Epoch 5/30
39/39 [==============================] - 17s 436ms/step - loss: 0.1905 - accuracy: 0.9306 - val_loss: 1.4126 - val_accuracy: 0.5725
Epoch 6/30
39/39 [==============================] - 17s 439ms/step - loss: 0.1777 - accuracy: 0.9427 - val_loss: 1.4918 - val_accuracy: 0.5725
In [20]:
test_loss, test_accuracy = model_4.evaluate_generator(test_generator3, steps=len(X_test3) // batch_size)
print(f"Test Accuracy: {test_accuracy}")
2024-01-08 06:46:22.630992: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
Test Accuracy: 0.53125
In [25]:
from sklearn.metrics import roc_curve, auc

# Get predicted probabilities for the positive class
y_pred_prob4 = model_4.predict_generator(test_generator3)

# Compute ROC curve and ROC area
fpr, tpr, _ = roc_curve(y_test3, y_pred_prob4)
roc_auc = auc(fpr, tpr)

# Plot ROC curve
plt.figure(figsize=(8, 8))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()
2024-01-08 06:50:46.067313: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
In [26]:
predictions = model_4.predict(test_generator3)

# Convert predicted probabilities to binary labels (0 or 1)
predicted_labels = (predictions > 0.5).astype(int)

# Calculate the classification report
report = classification_report(y_test3, predicted_labels)

# Print the classification report
print(report)
 1/11 [=>............................] - ETA: 1s
2024-01-08 06:52:40.369217: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
11/11 [==============================] - 1s 110ms/step
              precision    recall  f1-score   support

           0       0.52      1.00      0.69       181
           1       0.00      0.00      0.00       164

    accuracy                           0.52       345
   macro avg       0.26      0.50      0.34       345
weighted avg       0.28      0.52      0.36       345

In [ ]: